import  time,pymysql,datetime
import requests,os,re
from crawlab import save_item
from requests_html import HTMLSession

conn = pymysql.connect(
    host="172.16.10.201",
    user="zy001",
    port=3306,
    password="zy@123",
    database="crawl-bk",
    charset = 'utf8'
)
cursor = conn.cursor()
sql = "select url from beike_community"



def save_detil_date(data, u):
    level1 = data.xpath('//*[@id="beike"]/div[1]/div[2]/div[3]/div/div/a[2]', first=True).text
    level2 = data.xpath('//*[@id="beike"]/div[1]/div[2]/div[3]/div/div/a[3]', first=True).text
    location = data.find('.sub', first=True).text
    building_type = ''
    manage_fee = ''
    manage_company = ''
    company = ''
    build_num = ''
    house_num = ''

    key = data.xpath('//*[@id="beike"]/div[1]/div[3]/div[1]/div[2]/div[3]/div/span[1]')
    val = data.xpath('//*[@id="beike"]/div[1]/div[3]/div[1]/div[2]/div[3]/div/span[2]')
    for i in key:
        index = key.index(i)
        if "建筑类型" in i.text:
            building_type = val[index].text
        if "物业费用" in i.text:
            manage_fee = val[index].text
        if "物业公司" in i.text:
            manage_company = val[index].text
        if "开发商" in i.text:
            company = val[index].text
        if "楼栋总数" in i.text:
            build_num = val[index].text
        if "房屋总数" in i.text:
            house_num = val[index].text

    up_s = '''UPDATE beike_community set level1 = '{level1}',level2 = '{level2}',company= '{company}',building_type = '{building_type}',manage_fee = '{manage_fee}',manage_company = '{manage_company}',build_num = '{build_num}' ,house_num = '{house_num}' ,location = '{location}' where url = '{url}' '''.format(
        level1=level1,
        level2=level2,
        building_type=building_type,
        manage_fee=manage_fee,
        manage_company=manage_company
        , company=company,
        build_num=build_num,
        house_num=house_num,
        location=location,
        url=u)
    cursor.execute(up_s)
    conn.commit()
    print(u)
    print('成功')


if __name__ == '__main__':
    session = HTMLSession()
    cursor.execute(sql)
    datas = cursor.fetchall()
    print('存储详情')
    for i in datas:
        print(i)
        try:
            u = i[0]
            req = session.get(u)
            if req.status_code == 200:
                about = req.html
                save_detil_date(about, u)
        except Exception as e:
            print(e)
            time.sleep(40)


    print('存储搜索位置')
    SQL_HREF = "SELECT community from  beike_community "
    cursor.execute(SQL_HREF)
    link = 'https://ajax.api.ke.com/sug/headerSearch'
    for i in cursor.fetchall():
        try:
            u = i[0]
            data = {
                'channel': 'xiaoqu',
                'cityId': '530100',
                'query': u
            }
            r = session.get(link, params=data)
        
            data = r.json()
            region = data.get('data').get('result')[0].get('region')
            update_sql = 'UPDATE beike_community set region = "'+region+'" where community ="'+u+'"'
            cursor.execute(update_sql)
            conn.commit()
        except Exception as e:
            print(e)

            


cursor = conn.cursor(pymysql.cursors.DictCursor)
cursor.execute('select * from beike_community')
data = cursor.fetchall()
for i in data:
    save_item(i)

cursor.close()
conn.close()







